#!/usr/bin/env python
#-*- coding:utf-8 -*-

import xlrd
import re
import json

name_re = re.compile(r'^([^(]+)')

def read_xlsx():
    category_map = {}
    with open('amazon_category.json') as f:
        category_map = json.load(f)
    return category_map

def gen_xlsx():
    category_map = {}
    workbook = xlrd.open_workbook('amazon_category.xlsx')
    booksheet = workbook.sheet_by_name('Sheet1')
    p = list()
    for row in range(booksheet.nrows):
        row_data = []
        col = booksheet.ncols - 1 if booksheet.ncols < 4 else 3
        name = booksheet.cell(row, col).value
        top_cate = booksheet.cell(row, 1).value
        if top_cate and name:
            rname = name_re.match(name)
            if rname:
                name = rname.group(1)
            else:
                name = value
            rtop_cate = name_re.match(top_cate)
            if rtop_cate:
                top_cate = rtop_cate.group(1)
            #print name, top_cate 
            category_map[name] = top_cate
    with open('amazon_category.json', 'w') as f:
        f.write(json.dumps(category_map))
    return category_map


if __name__ == '__main__':
    read_xlsx()
